Creating
Dataset
Statistics
set.seed(7623)
n <- sample(35:4000, 1)
mu <- rnorm(1, 2.5, 4)
sd <- abs(rnorm(1, 2.5, 4))
# Generate datasets
ulusoy_data <- rnorm(n, mu, sd)
ulusoy_data2 <- rnorm(n, mu, 2.5*sd)
stats <- data.frame(
Statistic = c("Mean", "Variance", "Standard Deviation", "Median"),
Value = round(c(mean(ulusoy_data), var(ulusoy_data),
sd(ulusoy_data), median(ulusoy_data)), 4)
)
knitr::kable(stats, caption = "Statistics")
Statistics
| Mean |
11.0374 |
| Variance |
30.9130 |
| Standard Deviation |
5.5599 |
| Median |
11.4747 |
Histograms
par(mar = c(5, 4, 4, 4))
hist(ulusoy_data,
breaks = 30,
probability = TRUE,
col = rgb(0,0,1,0.5),
main = "Histograms",
xlab = "Values")
hist(ulusoy_data2,
breaks = 30,
probability = TRUE,
col = rgb(1,0,0,0.4),
add = TRUE)
legend("topright",
c("Original Data", "Data with 2.5*SD"),
fill = c(rgb(0,0,1,0.5), rgb(1,0,0,0.5)))

cat("Since standard deviation measures how spread out numbers are from the mean, higher the standard deviation, higher the data, hence a broader histogram.")
## Since standard deviation measures how spread out numbers are from the mean, higher the standard deviation, higher the data, hence a broader histogram.
Stock
Market
safe_get_stock <- function(symbol, date1, date2) {
tryCatch({
getSymbols(symbol,
src = "yahoo",
from = date1,
to = date2,
periodicity = "weekly",
auto.assign = FALSE)
}, error = function(e) {
message(paste("Error for", symbol, ":", e$message))
return(NULL)
})
}
analyze_stocks <- function(symbols, date1, date2) {
results <- list()
combined_data <- list()
for(symbol in symbols) {
stock_data <- safe_get_stock(symbol, date1, date2)
if(is.null(stock_data)) {
message(paste("Error"))
next
}
prices <- try(Cl(stock_data))
returns <- try({
ret <- diff(log(prices)) * 100
ret[!is.finite(ret)] <- NA
ret
})
roll_sd <- try(rollapply(returns, 8, sd, fill = NA))
combined_data[[symbol]] <- list(
prices = data.frame(
date = index(prices),
price = as.numeric(prices)
),
returns = data.frame(
date = index(returns),
returns = as.numeric(returns),
roll_sd = as.numeric(roll_sd)
)
)
results[[symbol]] <- list(
stats = try({
c(
Max = max(prices, na.rm = TRUE),
Min = min(prices, na.rm = TRUE),
Range = diff(range(prices, na.rm = TRUE)),
Average = mean(prices, na.rm = TRUE),
SD = sd(as.numeric(prices), na.rm = TRUE),
CV = sd(as.numeric(prices), na.rm = TRUE) /
mean(as.numeric(prices), na.rm = TRUE)
)
}),
corr = cor(returns, roll_sd, use = "complete.obs")
)
}
# 1. Combined price comparison
price_data <- merge(combined_data[[symbols[1]]]$prices,
combined_data[[symbols[2]]]$prices,
by = "date", suffixes = c("_1", "_2"))
price_plot <- ggplot(price_data, aes(x = date)) +
geom_line(aes(y = price_1, color = symbols[1])) +
geom_line(aes(y = price_2, color = symbols[2])) +
theme_minimal() +
labs(title = "Stock Price Comparison",
x = "Date",
y = "Price",
color = "Stock")
stats_text <- paste0(
symbols[1], " Statistics:\n",
"Max: ", round(results[[symbols[1]]]$stats["Max"], 2),
", Min: ", round(results[[symbols[1]]]$stats["Min"], 2),
", Range: ", round(results[[symbols[1]]]$stats["Range"], 2),
", Average: ", round(results[[symbols[1]]]$stats["Average"], 2),
", SD: ", round(results[[symbols[1]]]$stats["SD"], 2),
", CV: ", round(results[[symbols[1]]]$stats["CV"], 4), "\n\n",
symbols[2], " Statistics:\n",
"Max: ", round(results[[symbols[2]]]$stats["Max"], 2),
", Min: ", round(results[[symbols[2]]]$stats["Min"], 2),
", Range: ", round(results[[symbols[2]]]$stats["Range"], 2),
", Average: ", round(results[[symbols[2]]]$stats["Average"], 2),
", SD: ", round(results[[symbols[2]]]$stats["SD"], 2),
", CV: ", round(results[[symbols[2]]]$stats["CV"], 4), "\n\n",
"Risk Analysis: ",
ifelse(results[[symbols[1]]]$stats["CV"] > results[[symbols[2]]]$stats["CV"],
paste(symbols[1], "is riskier than", symbols[2]),
paste(symbols[2], "is riskier than", symbols[1])),
" because of higher Coefficient of Variation (CV), which means greater standard deviation (SD). As data spreads more out, \nit becomes riskier and volatile."
)
price_plot <- price_plot +
labs(caption = stats_text) +
theme(plot.caption = element_text(hjust = 0))
returns_data <- merge(combined_data[[symbols[1]]]$returns,
combined_data[[symbols[2]]]$returns,
by = "date", suffixes = c("_1", "_2"))
returns_plot <- ggplot(returns_data, aes(x = date)) +
geom_line(aes(y = returns_1, color = symbols[1])) +
geom_line(aes(y = returns_2, color = symbols[2])) +
theme_minimal() +
labs(title = "Comparing Weekly Returns",
x = "Date",
y = "Returns (%)",
color = "Stock")
volatility_plot <- ggplot(returns_data, aes(x = date)) +
geom_line(aes(y = roll_sd_1, color = symbols[1])) +
geom_line(aes(y = roll_sd_2, color = symbols[2])) +
theme_minimal() +
labs(title = "8 Week Rolling Standard Deviation",
x = "Date",
y = "Standard Deviation",
color = "Stock",
caption = paste("Volatility patterns indicate temporal clustering,",
"showing periods of high and low market stress",
"having similar impacts on both stocks."))
returns_vol_plots <- list()
for(i in 1:2) {
symbol <- symbols[i]
suffix <- paste0("_", i)
returns_vol_plots[[symbol]] <- ggplot(returns_data, aes(x = date)) +
geom_line(aes(y = get(paste0("returns", suffix)), color = "Returns")) +
geom_line(aes(y = get(paste0("roll_sd", suffix)), color = "Volatility")) +
theme_minimal() +
labs(title = paste(symbol, "Returns vs Volatility"),
x = "Date",
y = "Value",
color = "Metric",
caption = paste("Correlation coefficient:",
round(results[[symbol]]$corr, 3),
"\nNegative correlation shows",
"periods of high losses",
"usually come with great volatility."))
}
return(list(
price_plot = price_plot,
returns_plot = returns_plot,
volatility_plot = volatility_plot,
returns_vol_plots = returns_vol_plots,
statistics = results
))
}
# Usage example:
date_start <- as.Date("2005-05-05")
date_end <- as.Date("2023-10-01")
analysis <- analyze_stocks(c("MSFT", "AAPL"), date_start, date_end)
# Display plots
print(analysis$price_plot)

print(analysis$returns_plot)

print(analysis$volatility_plot)

print(analysis$returns_vol_plots$MSFT)

print(analysis$returns_vol_plots$AAPL)

date_start <- as.Date("2005-05-05")
date_end <- Sys.Date() - 1
stocks <- analyze_stocks(c("MSFT", "AAPL"), date_start, date_end)
if(!is.null(stocks$MSFT)) {
print(stocks$MSFT$prices)
}
if(!is.null(stocks$MSFT)) {
print(stocks$MSFT$returns)
}
if(!is.null(stocks$MSFT) && !inherits(stocks$MSFT$stats, "try-error")) {
knitr::kable(data.frame(
Statistic = names(stocks$MSFT$stats),
Value = round(stocks$MSFT$stats, 2)
), caption = "Microsoft Statistics")
}
if(!is.null(stocks$AAPL)) {
print(stocks$AAPL$prices)
}
if(!is.null(stocks$AAPL)) {
print(stocks$AAPL$returns)
}
if(!is.null(stocks$AAPL) && !inherits(stocks$AAPL$stats, "try-error")) {
knitr::kable(data.frame(
Statistic = names(stocks$AAPL$stats),
Value = round(stocks$AAPL$stats, 2)
), caption = "Apple Statistics")
}
Probability Calculations
Birthday Problem
birthday_probability <- function(n, x) {
if(x > n) return(0)
n_simulations <- 1000
successes <- 0
for(i in 1:n_simulations) {
birthdays <- sample(1:365, n, replace = TRUE)
counts <- table(birthdays)
successes <- successes + (max(counts) >= x)
}
return(successes / n_simulations)
}
n_values <- seq(1, 555, by = 10)
x_values <- 2:5
results <- data.frame(n = n_values)
for(x in x_values) {
probs <- sapply(n_values, function(n) birthday_probability(n, x))
results[paste0("x", x)] <- probs
}
library(ggplot2)
library(gridExtra)
# Plot for x = 2
p1 <- ggplot(results, aes(x = n, y = x2)) +
geom_line(color = "pink", size = 1) +
theme_minimal() +
labs(title = "At least 2 people sharing birthday",
x = "Class Size (n)",
y = "Probability") +
ylim(0, 1) +
theme(plot.title = element_text(size = 10))
# Plot for x = 3
p2 <- ggplot(results, aes(x = n, y = x3)) +
geom_line(color = "red", size = 1) +
theme_minimal() +
labs(title = "At least 3 people sharing birthday",
x = "Class Size (n)",
y = "Probability") +
ylim(0, 1) +
theme(plot.title = element_text(size = 10))
# Plot for x = 4
p3 <- ggplot(results, aes(x = n, y = x4)) +
geom_line(color = "turquoise", size = 1) +
theme_minimal() +
labs(title = "At least 4 people sharing birthday",
x = "Class Size (n)",
y = "Probability") +
ylim(0, 1) +
theme(plot.title = element_text(size = 10))
# Plot for x = 5
p4 <- ggplot(results, aes(x = n, y = x5)) +
geom_line(color = "yellow", size = 1) +
theme_minimal() +
labs(title = "At least 5 people sharing birthday",
x = "Class Size (n)",
y = "Probability") +
ylim(0, 1) +
theme(plot.title = element_text(size = 10))
# Combine all plots in a 2x2 grid
grid.arrange(p1, p2, p3, p4, ncol = 2)

cat("Had problems with generating a 3d graph, therefore found a solution this way.")
## Had problems with generating a 3d graph, therefore found a solution this way.
Coin Toss
simulate_coin_tosses <- function(p_heads = 0.5) {
N <- 500
results <- numeric(N)
cumulative_prob <- numeric(N)
for(i in 1:N) {
tosses <- rbinom(4, 1, p_heads)
results[i] <- sum(tosses) >= 1
cumulative_prob[i] <- mean(results[1:i])
}
return(data.frame(
experiment = 1:N,
cumulative_probability = cumulative_prob
))
}
# Running simulations
set.seed(123)
fair_coin_results <- simulate_coin_tosses(0.5)
unfair_coin_results <- simulate_coin_tosses(0.2)
# Fair coin
fair_coin_plot <- ggplot(fair_coin_results, aes(x = experiment, y = cumulative_probability)) +
geom_line(color = "green") +
geom_hline(yintercept = 1 - (1/2)^4, linetype = "dashed", color = "red") +
theme_minimal() +
labs(title = "Fair Coin (p = 0.5): Probability of At Least One Head in Four Tosses",
x = "Number of Experiments",
y = "Probability",
caption = "Red line is the theoretical probability = 0.9375") +
theme(text = element_text(size = 12),
plot.caption = element_text(size = 12)) +
ylim(0, 1)
# Unfair coin
unfair_coin_plot <- ggplot(unfair_coin_results, aes(x = experiment, y = cumulative_probability)) +
geom_line(color = "purple") +
geom_hline(yintercept = 1 - (0.8)^4, linetype = "dashed", color = "blue") +
theme_minimal() +
labs(title = "Unfair Coin (p = 0.2): Probability of At Least One Head in Four Tosses",
x = "Number of Experiments",
y = "Probability",
caption = "Blue line is the theoretical probability = 0.5904") +
theme(text = element_text(size = 12),
plot.caption = element_text(size = 12)) +
ylim(0, 1)
print(fair_coin_plot)

print(unfair_coin_plot)

Elections
simulate_election <- function(n_voters, p_A, p_B, p_C, n_trials = 500) {
sample_sizes <- c(5, seq(10, n_voters, by = 10))
win_matrix <- matrix(0, nrow = length(sample_sizes), ncol = 3)
colnames(win_matrix) <- c("A", "B", "C")
for(i in seq_along(sample_sizes)) {
current_n <- sample_sizes[i]
winners <- character(n_trials)
for(j in 1:n_trials) {
votes <- sample(c("A", "B", "C"), current_n,
prob = c(p_A, p_B, p_C),
replace = TRUE)
vote_counts <- table(factor(votes, levels = c("A", "B", "C")))
# Find out winner
max_votes <- max(vote_counts)
winners[j] <- sample(names(vote_counts)[vote_counts == max_votes], 1)
}
win_props <- table(factor(winners, levels = c("A", "B", "C"))) / n_trials
win_matrix[i,] <- win_props
}
# Create data frame
results_df <- data.frame(
sample_size = rep(sample_sizes, 3),
win_probability = c(win_matrix[,"A"], win_matrix[,"B"], win_matrix[,"C"]),
candidate = rep(c("A", "B", "C"), each = length(sample_sizes))
)
return(results_df)
}
# Run simulations for both scenarios
set.seed(123)
# Scenario 1: 0.55, 0.25, 0.20
results1 <- simulate_election(555, 0.55, 0.25, 0.20)
plot1 <- ggplot(results1, aes(x = sample_size, y = win_probability, color = candidate)) +
geom_line() +
geom_hline(yintercept = 0.55, linetype = "dashed", color = "orange", alpha = 0.5) +
geom_hline(yintercept = 0.25, linetype = "dashed", color = "green", alpha = 0.5) +
geom_hline(yintercept = 0.20, linetype = "dashed", color = "blue", alpha = 0.5) +
theme_minimal() +
labs(title = "Convergence of Winning Probabilities (Voting Ratios: A=0.55, B=0.25, C=0.20)",
x = "Sample Size (n)",
y = "Winning Probability",
color = "Candidate") +
ylim(0, 1)
# Scenario 2: 0.42, 0.48, 0.1
results2 <- simulate_election(555, 0.42, 0.48, 0.10)
plot2 <- ggplot(results2, aes(x = sample_size, y = win_probability, color = candidate)) +
geom_line() +
geom_hline(yintercept = 0.42, linetype = "dashed", color = "orange", alpha = 0.5) +
geom_hline(yintercept = 0.48, linetype = "dashed", color = "green", alpha = 0.5) +
geom_hline(yintercept = 0.1, linetype = "dashed", color = "blue", alpha = 0.5) +
theme_minimal() +
labs(title = "Convergence of Winning Probabilities (Voting Ratios: A=0.42, B=0.48, C=0.10)",
x = "Sample Size (n)",
y = "Winning Probability",
color = "Candidate") +
ylim(0, 1)
print(plot1)

print(plot2)

# Analysis
cat("\nAnalysis of Election Winning Probabilities:\n\n")
##
## Analysis of Election Winning Probabilities:
cat("1. Scenario 1 (0.55, 0.25, 0.20):\n")
## 1. Scenario 1 (0.55, 0.25, 0.20):
cat(" - Winning probabilities don't converge to voting ratios\n")
## - Winning probabilities don't converge to voting ratios
cat(" - Candidate A wins much more frequently than his voting ratio\n")
## - Candidate A wins much more frequently than his voting ratio
cat("2. Scenario 2 (0.42, 0.48, 0.1):\n")
## 2. Scenario 2 (0.42, 0.48, 0.1):
cat(" - In the 2nd scenerio I tried to simulate a probable Turkish election\n")
## - In the 2nd scenerio I tried to simulate a probable Turkish election
cat(" - There is a competition between Reis and Primary CHP Candidate\n")
## - There is a competition between Reis and Primary CHP Candidate
cat(" - Even though it is a low one, REİS still has a chance\n")
## - Even though it is a low one, REİS still has a chance
cat(" - DEM has no chance of winning despite a 10% vote share\n")
## - DEM has no chance of winning despite a 10% vote share
cat(" - This demonstrates how plurality voting can affect minorities disportionately\n\n")
## - This demonstrates how plurality voting can affect minorities disportionately
cat("3. Additional Analysis Possibilities:\n")
## 3. Additional Analysis Possibilities:
cat(" - Majority or Plurality Trends Over Time\n")
## - Majority or Plurality Trends Over Time
cat(" - Sensitivity Analysis\n")
## - Sensitivity Analysis
cat(" - Polarization Analysis in Highly Polarized Countries\n")
## - Polarization Analysis in Highly Polarized Countries
cat(" - Coalition scenarios\n")
## - Coalition scenarios